Regression modeling
with Guediawaye Census data
outcome = "MPI_men"
vars_to_remove = c("id_dr","REGION","DEPT","NOM_ARR","NOM_CA","RGPH","homme", "femme","hh_size","Aucun","Superir","Ltrcy_P","Ltrcy_S","Ltrcy_D","rsdnt_p","rsdnt_b","I_rps_s","I_sns_m","MPI_men","mdn_cm_","mn_cm_g","cm_homm","cm_femm","pct_cm_h","geometry")
labels_var = c("Id dr","Region","Dept.","Nom Arr.","Nom CA","RGPH","Nombre d'homme","Nombre de femme","Taille ménage","Fertilité","Niveau d'instruction: Aucun","Niveau d'instruction: Primaire","Niveau d'instruction: Moyen","Niveau d'instruction: Secondaire", "Niveau d'instruction: Supérieur","Nombre de French dans le ménage","Nombre de Arabic dans le ménage","Nombre de Wolof dans le ménage","Nombre de Pulaar dans le ménage","Nombre de Sereer dans le ménage","Nombre de Mandingo dans le ménage","Nombre de Diola dans le ménage","Nombre de Soninke dans le ménage","Nombre de résident permanent","Nombre de Résident Absent","Nombre de repas sauté","Nombre de soins médicaux","MPI","Nombre de ménage dans le DR","Age médian du CM dans le DR","Age moyen du CM dans le DR","Nombre de CM de sexe masculin","Nombre de CM de sexe feminin","Proportion de CM de sexe masculin","Proportion de CM de sexe féminin","geometry")
label_to_remove = c("Id dr","Region","Dept.","Nom Arr.","Nom CA","RGPH","Nombre d'homme","Nombre de femme","Taille ménage","Niveau d'instruction: Aucun", "Niveau d'instruction: Supérieur","Nombre de Pulaar dans le ménage","Nombre de Sereer dans le ménage","Nombre de Diola dans le ménage","Nombre de résident permanent","Nombre de Résident Absent","Nombre de repas sauté","Nombre de soins médicaux","MPI","Age médian du CM dans le DR","Age moyen du CM dans le DR","Nombre de CM de sexe masculin","Nombre de CM de sexe feminin","Proportion de CM de sexe masculin","geometry")
predictors_2002 = setdiff(names(MPI_data_dr_2002),vars_to_remove)
label_predictors_2002 = append(setdiff(labels_var, label_to_remove), "densité de la population")
predictors_2013 = setdiff(names(MPI_data_dr_2013),vars_to_remove)
label_predictors_2013 = append(setdiff(labels_var, label_to_remove), "densité de la population")
formula_2002 = paste0("log(",outcome,") ~ ",paste(predictors_2002[-length(predictors_2002)]," + ", collapse = ""),predictors_2002[length(predictors_2002)])
formula_2013 = paste0("log(",outcome,") ~ ",paste(predictors_2013[-length(predictors_2013)]," + ", collapse = ""),predictors_2013[length(predictors_2013)])
formula_2002 <- paste0(formula_2002, " + pop_density")
formula_2013 <- paste0(formula_2013, " + pop_density")
Inspecting the
outcome variable (MPI) with visualization
mhv_map <- ggplot(MPI_data_dr_2013, aes(fill = MPI_men)) +
geom_sf(color = NA) +
scale_fill_viridis_c(direction = -1) +
theme_void() +
labs(fill = "MPI ")
mhv_histogram <- ggplot(MPI_data_dr_2013, aes(x = MPI_men)) +
geom_histogram(alpha = 0.5, fill = "navy", color = "navy",
bins = 100) +
theme_minimal() +
scale_x_continuous(labels = scales::label_number_si(accuracy = 0.1)) +
labs(x = "MPI")
#grid.arrange(mhv_map, mhv_histogram, nrow = 1)
# mhv_map + mhv_histogram + labs(title = "MPI value charts for Guediawaye Census 2013")
gridExtra::grid.arrange(mhv_map, mhv_histogram, nrow = 1,
top = "MPI value charts for Guediawaye Census 2013")

ggsave(paste0(here::here(),"/output/output_model/img/1_hv_histogram.png"), width = 8, height = 5)
MPI_data_dr %>%
mutate(RGPH=ifelse(RGPH=="2013","Census 2013", "Census 2002")) %>%
ggplot(aes(fill = MPI_men)) +
geom_sf(color = NA) +
scale_fill_viridis_c(direction = -1) +
theme_void() +
labs(fill = "MPI")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
#legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
#panel.grid = element_line(color = "white", size = 0.8),
legend.box.background = element_rect(),
legend.box.margin = margin(6, 6, 6, 6),
plot.background = element_rect(fill = "white"))+
ggspatial::annotation_scale(
location = "br",
bar_cols = c("grey60", "white"),
text_family = "ArcherPro Book"
) +
ggspatial::annotation_north_arrow(
location = "tl", which_north = "true",
#pad_x = unit(0.4, "in"), pad_y = unit(0.4, "in"),
#which_north = "grid",
height = unit(1, "cm"),
width = unit(1, "cm"),
)

ggsave(paste0(here::here(),"/output/output_model/img/2_mpi_rgph.png"), width = 8, height = 5)
MPI_data_dr %>%
mutate(RGPH=ifelse(RGPH=="2013","Census 2013", "Census 2002")) %>%
ggplot(aes(x = MPI_men)) +
geom_histogram(alpha = 0.5, fill = "navy", color = "navy",
bins = 100) +
theme_minimal() +
scale_x_continuous(labels = scales::label_number_si(accuracy = 0.1)) +
xlab("MPI")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
#legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
#panel.grid = element_line(color = "white", size = 0.8),
legend.box.background = element_rect(),
legend.box.margin = margin(6, 6, 6, 6),
plot.background = element_rect(fill = "white"))

ggsave(paste0(here::here(),"/output/output_model/img/3_mpi_rgph_hist.png"), width = 8, height = 5)
mhv_map_log <- ggplot(MPI_data_dr_2013, aes(fill = log(MPI_men))) +
geom_sf(color = NA) +
scale_fill_viridis_c(direction = -1) +
theme_void() +
labs(fill = "MPI\nvalue (log)")
mhv_histogram_log <- ggplot(MPI_data_dr_2013, aes(x = log(MPI_men))) +
geom_histogram(alpha = 0.5, fill = "navy", color = "navy",
bins = 100) +
theme_minimal() +
scale_x_continuous() +
labs(x = "MPI (log)")
gridExtra::grid.arrange(mhv_map_log, mhv_histogram_log, nrow = 1,
top = "Logged MPI value charts for Guediawaye Census 2013")

ggsave(paste0(here::here(),"/output/output_model/img/4_hv_histogram_log.png"), width = 8, height = 5)
MPI_data_dr %>%
ggplot(aes(fill = log(MPI_men))) +
geom_sf(color = NA) +
scale_fill_viridis_c(direction = -1) +
theme_void() +
labs(fill = "MPI\nvalue (log)")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
#legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
#panel.grid = element_line(color = "white", size = 0.8),
legend.box.background = element_rect(),
legend.box.margin = margin(6, 6, 6, 6),
plot.background = element_rect(fill = "white"))+
ggspatial::annotation_scale(
location = "br",
bar_cols = c("grey60", "white"),
text_family = "ArcherPro Book"
) +
ggspatial::annotation_north_arrow(
location = "tl", which_north = "true",
#pad_x = unit(0.4, "in"), pad_y = unit(0.4, "in"),
#which_north = "grid",
height = unit(1, "cm"),
width = unit(1, "cm"),
)

ggsave(paste0(here::here(),"/output/output_model/img/5_log_mpi_rgph.png"), width = 8, height = 5)
A first regression
model
library(sf)
library(units)
predictors = c("menage","Primair","Moyen","Secondr","Ltrcy_F","pct_cm_f","hh_size","Ltrcy_A","Ltrcy_W","Ltrcy_M","fertlty")
outcome = "MPI_men"
MPI_data_dr_2013_for_model<- MPI_data_dr_2013 %>%
dplyr::select(MPI_men,predictors) %>%
mutate(pop_density = as.numeric(set_units(hh_size / st_area(.), "1/km2"))) %>%
dplyr::select(-hh_size)
##
MPI_data_dr_2002_for_model<- MPI_data_dr_2002 %>%
dplyr::select(MPI_men,predictors) %>%
mutate(pop_density = as.numeric(set_units(hh_size / st_area(.), "1/km2"))) %>%
dplyr::select(-hh_size)
# formula <- "log(MPI_men) ~ menage + Primair + Moyen + Secondr + Ltrcy_F + pct_cm_f + pop_density + Ltrcy_A + Ltrcy_W + Ltrcy_M + fertlty"
### For 2002
model_2002 <- lm(formula = formula_2002, data = MPI_data_dr_2002_for_model)
### For 2013
model_2013 <- lm(formula = formula_2013, data = MPI_data_dr_2013_for_model)
stargazer::stargazer(model_2002, model_2013, type="html",
keep = predictors_2002,
covariate.labels = label_predictors_2002, out = (paste0(here::here(),"/output/output_model/1_reg_lineaire.html")))
##
## <table style="text-align:center"><tr><td colspan="3" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"></td><td colspan="2"><em>Dependent variable:</em></td></tr>
## <tr><td></td><td colspan="2" style="border-bottom: 1px solid black"></td></tr>
## <tr><td style="text-align:left"></td><td colspan="2">log(MPI_men)</td></tr>
## <tr><td style="text-align:left"></td><td>(1)</td><td>(2)</td></tr>
## <tr><td colspan="3" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Fertilité</td><td>0.00004</td><td>0.0001</td></tr>
## <tr><td style="text-align:left"></td><td>(0.0001)</td><td>(0.0001)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Niveau d'instruction: Primaire</td><td>-0.001</td><td>0.00004</td></tr>
## <tr><td style="text-align:left"></td><td>(0.001)</td><td>(0.0004)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Niveau d'instruction: Moyen</td><td>-0.005<sup>***</sup></td><td>-0.004<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.001)</td><td>(0.001)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Niveau d'instruction: Secondaire</td><td>-0.004<sup>**</sup></td><td>-0.005<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.002)</td><td>(0.001)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Nombre de French dans le ménage</td><td>0.001</td><td>-0.001<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.001)</td><td>(0.0003)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Nombre de Arabic dans le ménage</td><td>0.0003<sup>***</sup></td><td>0.001<sup>**</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.0001)</td><td>(0.0004)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Nombre de Wolof dans le ménage</td><td>0.002<sup>***</sup></td><td>0.0004</td></tr>
## <tr><td style="text-align:left"></td><td>(0.001)</td><td>(0.0004)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Nombre de Mandingo dans le ménage</td><td>-0.018</td><td>-0.005</td></tr>
## <tr><td style="text-align:left"></td><td>(0.012)</td><td>(0.011)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Nombre de Soninke dans le ménage</td><td>0.005<sup>***</sup></td><td>0.009<sup>***</sup></td></tr>
## <tr><td style="text-align:left"></td><td>(0.001)</td><td>(0.001)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td></tr>
## <tr><td style="text-align:left">Nombre de ménage dans le DR</td><td>0.095</td><td>-0.241</td></tr>
## <tr><td style="text-align:left"></td><td>(0.279)</td><td>(0.161)</td></tr>
## <tr><td style="text-align:left"></td><td></td><td></td></tr>
## <tr><td colspan="3" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left">Observations</td><td>268</td><td>435</td></tr>
## <tr><td style="text-align:left">R<sup>2</sup></td><td>0.636</td><td>0.589</td></tr>
## <tr><td style="text-align:left">Adjusted R<sup>2</sup></td><td>0.620</td><td>0.579</td></tr>
## <tr><td style="text-align:left">Residual Std. Error</td><td>0.260 (df = 256)</td><td>0.276 (df = 423)</td></tr>
## <tr><td style="text-align:left">F Statistic</td><td>40.686<sup>***</sup> (df = 11; 256)</td><td>55.161<sup>***</sup> (df = 11; 423)</td></tr>
## <tr><td colspan="3" style="border-bottom: 1px solid black"></td></tr><tr><td style="text-align:left"><em>Note:</em></td><td colspan="2" style="text-align:right"><sup>*</sup>p<0.1; <sup>**</sup>p<0.05; <sup>***</sup>p<0.01</td></tr>
## </table>
#library(corrr)
### 2002
dfw_estimates_2002 <- MPI_data_dr_2002_for_model%>%
select(-MPI_men) %>%
st_drop_geometry()
correlations <- correlate(dfw_estimates_2002, method = "pearson")
network_plot(correlations) + labs(title = "Network plot of correlations between model predictors for Guediawaye Census 2002")

ggsave(paste0(here::here(),"/output/output_model/img/6_corr_var.png"), width = 8, height = 5)
### 2013
dfw_estimates_2013 <- MPI_data_dr_2013_for_model%>%
select(-MPI_men) %>%
st_drop_geometry()
correlations <- correlate(dfw_estimates_2013, method = "pearson")
network_plot(correlations) + labs(title = "Network plot of correlations between model predictors for Guediawaye Census 2013")

ggsave(paste0(here::here(),"/output/output_model/img/6_corr_var.png"), width = 8, height = 5)
library(car)
vif(model_2013)
## fertlty Primair Moyen Secondr Ltrcy_F Ltrcy_A
## 5.367791 6.364877 8.106134 6.016109 7.975205 1.753154
## Ltrcy_W Ltrcy_M menage pct_cm_f pop_density
## 1.482194 1.495710 3.871359 1.078805 1.310951
vif(model_2002)
## fertlty Primair Moyen Secondr Ltrcy_F Ltrcy_A
## 12.816729 296.905235 62.394454 23.511511 676.243716 4.316810
## Ltrcy_W Ltrcy_M menage pct_cm_f pop_density
## 2.813059 2.154716 3.245263 1.192264 5.093219
Dimension reduction
with principal components analysis
pca_2013 <- prcomp(
formula = ~.,
data = dfw_estimates_2013,
scale. = TRUE,
center = TRUE
)
summary(pca_2013)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.1830 1.2702 1.1990 1.0093 0.86570 0.70864 0.63550
## Proportion of Variance 0.4332 0.1467 0.1307 0.0926 0.06813 0.04565 0.03671
## Cumulative Proportion 0.4332 0.5799 0.7106 0.8032 0.87133 0.91698 0.95369
## PC8 PC9 PC10 PC11
## Standard deviation 0.46543 0.35450 0.28978 0.28829
## Proportion of Variance 0.01969 0.01142 0.00763 0.00756
## Cumulative Proportion 0.97339 0.98481 0.99244 1.00000
##
pca_2002 <- prcomp(
formula = ~.,
data = dfw_estimates_2002,
scale. = TRUE,
center = TRUE
)
summary(pca_2002)
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Standard deviation 2.484 1.2669 1.1310 0.89828 0.6902 0.51195 0.42413
## Proportion of Variance 0.561 0.1459 0.1163 0.07335 0.0433 0.02383 0.01635
## Cumulative Proportion 0.561 0.7069 0.8232 0.89657 0.9399 0.96370 0.98005
## PC8 PC9 PC10 PC11
## Standard deviation 0.34446 0.24919 0.19411 0.03123
## Proportion of Variance 0.01079 0.00565 0.00343 0.00009
## Cumulative Proportion 0.99084 0.99649 0.99991 1.00000
pca_2013_tibble <- pca_2013$rotation %>%
as_tibble(rownames = "predictor")
pca_2013_tibble
## # A tibble: 11 × 12
## predictor PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 menage 0.411 0.0273 0.0207 -0.00165 -0.116 0.171 -0.100 0.860
## 2 Primair 0.391 0.00136 0.292 -0.134 -0.0931 0.278 -0.182 -0.321
## 3 Moyen 0.423 0.0591 -0.169 -0.0299 -0.123 -0.105 0.0880 -0.292
## 4 Secondr 0.359 0.0776 -0.404 0.0669 -0.0637 -0.350 0.241 0.0562
## 5 Ltrcy_F 0.419 -0.0249 -0.140 0.0607 0.118 -0.310 0.179 -0.148
## 6 pct_cm_f -0.0501 -0.160 0.0499 -0.944 0.0273 -0.226 0.135 0.0901
## 7 Ltrcy_A 0.187 -0.242 0.385 0.127 0.804 -0.193 -0.00979 0.0681
## 8 Ltrcy_W 0.00824 -0.670 -0.121 0.0919 -0.249 -0.329 -0.598 -0.0156
## 9 Ltrcy_M 0.00631 -0.670 -0.169 0.0737 -0.0179 0.453 0.556 -0.0105
## 10 fertlty 0.397 -0.0175 0.210 -0.142 -0.0923 0.369 -0.195 -0.177
## 11 pop_densi… 0.00230 -0.0771 0.682 0.178 -0.477 -0.355 0.370 0.0462
## # ℹ 3 more variables: PC9 <dbl>, PC10 <dbl>, PC11 <dbl>
###
pca_2002_tibble <- pca_2002$rotation %>%
as_tibble(rownames = "predictor")
pca_2002_tibble
## # A tibble: 11 × 12
## predictor PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 menage 0.317 -0.151 0.0760 1.53e-1 0.743 -0.386 0.0986 -0.334
## 2 Primair 0.386 -0.0977 -0.0903 1.77e-2 0.0916 0.125 0.0829 0.551
## 3 Moyen 0.366 0.0740 0.250 2.23e-1 -0.182 0.0868 -0.0602 0.0971
## 4 Secondr 0.259 0.199 0.519 3.79e-1 -0.230 0.162 -0.0422 -0.401
## 5 Ltrcy_F 0.394 -0.0113 0.0838 1.19e-1 -0.0357 0.120 0.0212 0.326
## 6 pct_cm_f -0.0469 -0.168 -0.612 7.50e-1 -0.0961 0.0674 -0.0988 -0.0935
## 7 Ltrcy_A 0.342 -0.0715 -0.174 -2.34e-1 -0.311 -0.481 -0.664 -0.115
## 8 Ltrcy_W 0.190 0.586 -0.273 -7.74e-6 -0.233 -0.446 0.540 -0.00999
## 9 Ltrcy_M 0.0267 0.707 -0.206 -4.56e-2 0.388 0.348 -0.425 -0.0105
## 10 fertlty 0.373 -0.141 -0.163 -1.40e-1 0.140 0.135 0.0543 0.0967
## 11 pop_density 0.316 -0.157 -0.315 -3.60e-1 -0.148 0.462 0.229 -0.526
## # ℹ 3 more variables: PC9 <dbl>, PC10 <dbl>, PC11 <dbl>
# For 2002
pca_2002_graph <- pca_2002_tibble %>%
select(predictor:PC5) %>%
pivot_longer(PC1:PC5, names_to = "component", values_to = "value") %>%
ggplot(aes(x = value, y = predictor)) +
geom_col(fill = "darkgreen", color = "darkgreen", alpha = 0.5) +
facet_wrap(~component, nrow = 1) +
labs(y = NULL, x = "Value", title = "Census 2002") +
theme_minimal()
# For 2013
pca_2013_graph <- pca_2013_tibble %>%
select(predictor:PC5) %>%
pivot_longer(PC1:PC5, names_to = "component", values_to = "value") %>%
ggplot(aes(x = value, y = predictor)) +
geom_col(fill = "darkgreen", color = "darkgreen", alpha = 0.5) +
facet_wrap(~component, nrow = 1) +
labs(y = NULL, x = "Value", title = "Census 2013") +
theme_minimal()
gridExtra::grid.arrange(pca_2002_graph,pca_2013_graph, nrow = 1, top="Loadings for first five principal components for Guediawaye ")

ggsave(paste0(here::here(),"/output/output_model/img/7_hist_pca_rgph.png"), width = 8, height = 5)
components_2013 <- predict(pca_2013, dfw_estimates_2013)
dfw_pca_2013 <- MPI_data_dr_2013_for_model%>%
select(MPI_men) %>%
cbind(components_2013)
ggplot(dfw_pca_2013, aes(fill = PC1)) +
geom_sf(color = NA) +
labs(title = "Map of principal component 1 for Guediawaye Census 2013") +
theme_void() +
scale_fill_viridis_c(direction = -1)

components_2002 <- predict(pca_2002, dfw_estimates_2002)
dfw_pca_2002 <- MPI_data_dr_2002_for_model%>%
select(MPI_men) %>%
cbind(components_2002)
ggplot(dfw_pca_2002, aes(fill = PC1)) +
geom_sf(color = NA) +
labs(title = "Map of principal component 1 for Guediawaye Census 2002") +
theme_void() +
scale_fill_viridis_c(direction = -1)

dfw_pca_2002$RGPH<-"Census 2002"
dfw_pca_2013$RGPH<-"Census 2013"
dfw_pca <- dfw_pca_2002 %>%
plyr::rbind.fill(dfw_pca_2013) %>%
st_as_sf()
label_pca <- paste0("Dimension ",1:11)
predictors_pca <- paste0("PC",1:11)
purrr::map(predictors_pca, function(curr_var){
curr_lab <- label_pca[match(as.character(sym(curr_var)), predictors_pca)]
curr_plot <- dfw_pca %>%
ggplot(aes(fill = !!sym(curr_var))) +
# Adding spatial features to the plot
geom_sf(color = NA) +
scale_fill_viridis_c(direction = -1)+
labs(title = curr_lab, fill = "values") +
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
#legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
#panel.grid = element_line(color = "white", size = 0.8),
legend.box.background = element_rect(),
legend.box.margin = margin(6, 6, 6, 6),
plot.background = element_rect(fill = "white"))+
ggspatial::annotation_scale(
location = "br",
bar_cols = c("grey60", "white"),
text_family = "ArcherPro Book"
) +
ggspatial::annotation_north_arrow(
location = "tl", which_north = "true",
#pad_x = unit(0.4, "in"), pad_y = unit(0.4, "in"),
#which_north = "grid",
height = unit(1, "cm"),
width = unit(1, "cm"),
)
print(curr_plot)
ggsave(paste0(here::here(),"/output/output_model/img/8_values_",curr_lab,"_rgph.png"), width = 8, height = 5)
}, .progress = TRUE)











## [[1]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/8_values_Dimension 1_rgph.png"
##
## [[2]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/8_values_Dimension 2_rgph.png"
##
## [[3]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/8_values_Dimension 3_rgph.png"
##
## [[4]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/8_values_Dimension 4_rgph.png"
##
## [[5]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/8_values_Dimension 5_rgph.png"
##
## [[6]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/8_values_Dimension 6_rgph.png"
##
## [[7]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/8_values_Dimension 7_rgph.png"
##
## [[8]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/8_values_Dimension 8_rgph.png"
##
## [[9]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/8_values_Dimension 9_rgph.png"
##
## [[10]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/8_values_Dimension 10_rgph.png"
##
## [[11]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/8_values_Dimension 11_rgph.png"
# For the model output
# Saving in the "dfw_pca"
sf::st_write(obj=dfw_pca, paste0(here::here(), "/output/output_model/shp/dfw_pca.shp"), driver = "ESRI Shapefile", delete_layer = TRUE)
## Deleting layer `dfw_pca' using driver `ESRI Shapefile'
## Writing layer `dfw_pca' to data source
## `C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/shp/dfw_pca.shp' using driver `ESRI Shapefile'
## Writing 703 features with 13 fields and geometry type Polygon.
### For 2002
pca_2002_formula <- paste0("log(MPI_men) ~ ",
paste0('PC', 1:6, collapse = ' + '))
pca_2002_model <- lm(formula = pca_2002_formula, data = dfw_pca_2002)
### For 2013
pca_2013_formula <- paste0("log(MPI_men) ~ ",
paste0('PC', 1:6, collapse = ' + '))
pca_2013_model <- lm(formula = pca_2013_formula, data = dfw_pca_2013)
stargazer::stargazer(pca_2002_model, pca_2013_model, type="text", out = (paste0(here::here(),"/output/output_model/2_reg_PCA.html")))
##
## ===================================================================
## Dependent variable:
## -----------------------------------------------
## log(MPI_men)
## (1) (2)
## -------------------------------------------------------------------
## PC1 -0.034*** -0.054***
## (0.007) (0.007)
##
## PC2 -0.078*** -0.029**
## (0.013) (0.012)
##
## PC3 -0.185*** 0.190***
## (0.014) (0.013)
##
## PC4 -0.119*** 0.013
## (0.018) (0.015)
##
## PC5 0.209*** -0.025
## (0.023) (0.018)
##
## PC6 -0.254*** 0.161***
## (0.032) (0.022)
##
## Constant -2.373*** -2.199***
## (0.016) (0.015)
##
## -------------------------------------------------------------------
## Observations 268 435
## R2 0.617 0.446
## Adjusted R2 0.608 0.438
## Residual Std. Error 0.264 (df = 261) 0.319 (df = 428)
## F Statistic 70.003*** (df = 6; 261) 57.462*** (df = 6; 428)
## ===================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
Spatial
regression
MPI_data_dr_2002_for_model$residuals <- residuals(model_2002)
ggplot(MPI_data_dr_2002_for_model, aes(x = residuals)) +
geom_histogram(bins = 100, alpha = 0.5, color = "navy",
fill = "navy") +
labs(title = "Distribution of model residuals for Guediawaye Census 2002") +
theme_minimal()

MPI_data_dr_2013_for_model$residuals <- residuals(model_2013)
ggplot(MPI_data_dr_2013_for_model, aes(x = residuals)) +
geom_histogram(bins = 100, alpha = 0.5, color = "navy",
fill = "navy") +
labs(title = "Distribution of model residuals for Guediawaye Census 2013") +
theme_minimal()

MPI_data_dr_for_model <- MPI_data_dr_2002_for_model %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(MPI_data_dr_2013_for_model %>%
dplyr::mutate(RGPH = "Census 2013"))
ggplot(MPI_data_dr_for_model, aes(x = residuals)) +
geom_histogram(bins = 100, alpha = 0.5, color = "navy",
fill = "navy") +
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

ggsave(paste0(here::here(),"/output/output_model/3_reg_lineaire_residuals.png"), width = 8, height = 5)
library(spdep)
wts_2002 <- MPI_data_dr_2002_for_model %>%
poly2nb() %>%
nb2listw()
moran.test(MPI_data_dr_2002_for_model$residuals, wts_2002)
##
## Moran I test under randomisation
##
## data: MPI_data_dr_2002_for_model$residuals
## weights: wts_2002
##
## Moran I statistic standard deviate = 2.1911, p-value = 0.01422
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.077202122 -0.003745318 0.001364795
wts_2013 <- MPI_data_dr_2013_for_model %>%
poly2nb() %>%
nb2listw()
moran.test(MPI_data_dr_2013_for_model$residuals, wts_2013)
##
## Moran I test under randomisation
##
## data: MPI_data_dr_2013_for_model$residuals
## weights: wts_2013
##
## Moran I statistic standard deviate = 4.7117, p-value = 1.228e-06
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic Expectation Variance
## 0.1325997182 -0.0023041475 0.0008197714
MPI_data_dr_2013_for_model$lagged_residuals <- lag.listw(wts_2013, MPI_data_dr_2013_for_model$residuals)
ggplot(MPI_data_dr_2013_for_model, aes(x = residuals, y = lagged_residuals)) +
theme_minimal() +
labs(title = "Moran scatterplot of residual spatial autocorrelation for Guediawaye Census 2013") +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red")

MPI_data_dr_2002_for_model$lagged_residuals <- lag.listw(wts_2002, MPI_data_dr_2002_for_model$residuals)
ggplot(MPI_data_dr_2002_for_model, aes(x = residuals, y = lagged_residuals)) +
theme_minimal() +
labs(title = "Moran scatterplot of residual spatial autocorrelation for Guediawaye Census 2002") +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red")

MPI_data_dr_for_model <- MPI_data_dr_2002_for_model %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(MPI_data_dr_2013_for_model %>%
dplyr::mutate(RGPH = "Census 2013"))
ggplot(MPI_data_dr_for_model, aes(x = residuals, y = lagged_residuals)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "red")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
panel.grid = element_line(color = "white", size = 0.8))

ggsave(paste0(here::here(),"/output/output_model/img/9_residuals_lagresiduals.png"), width = 8, height = 5)
Geographically weighted
regression
Choosing a bandwidth
for GWR
library(GWmodel)
library(sf)
dfw_data_sp_2002 <- MPI_data_dr_2002_for_model%>%
as_Spatial()
bw <- bw.gwr(
formula = formula_2002,
data = dfw_data_sp_2002,
kernel = "bisquare",
adaptive = TRUE
)
## Adaptive bandwidth: 173 CV score: 30.77821
## Adaptive bandwidth: 115 CV score: 22.62838
## Adaptive bandwidth: 78 CV score: 18.01934
## Adaptive bandwidth: 56 CV score: 20.35081
## Adaptive bandwidth: 92 CV score: 18.83631
## Adaptive bandwidth: 69 CV score: 18.26553
## Adaptive bandwidth: 83 CV score: 18.02488
## Adaptive bandwidth: 74 CV score: 18.01254
## Adaptive bandwidth: 72 CV score: 18.08195
## Adaptive bandwidth: 75 CV score: 17.91883
## Adaptive bandwidth: 76 CV score: 17.96413
## Adaptive bandwidth: 74 CV score: 18.01254
## Adaptive bandwidth: 75 CV score: 17.91883
dfw_data_sp_2013 <- MPI_data_dr_2013_for_model%>%
as_Spatial()
bw <- bw.gwr(
formula = formula_2013,
data = dfw_data_sp_2013,
kernel = "bisquare",
adaptive = TRUE
)
## Adaptive bandwidth: 276 CV score: 33.03686
## Adaptive bandwidth: 179 CV score: 32.93194
## Adaptive bandwidth: 117 CV score: 34.2892
## Adaptive bandwidth: 215 CV score: 32.81678
## Adaptive bandwidth: 240 CV score: 32.88297
## Adaptive bandwidth: 202 CV score: 32.80238
## Adaptive bandwidth: 191 CV score: 32.84514
## Adaptive bandwidth: 205 CV score: 32.78626
## Adaptive bandwidth: 211 CV score: 32.81625
## Adaptive bandwidth: 205 CV score: 32.78626
###
### 2002
gw_model_2002 <- gwr.basic(
formula = formula_2002,
data = dfw_data_sp_2002,
bw = bw,
kernel = "bisquare",
adaptive = TRUE
)
### 2013
gw_model_2013 <- gwr.basic(
formula = formula_2013,
data = dfw_data_sp_2013,
bw = bw,
kernel = "bisquare",
adaptive = TRUE
)
names(gw_model_2013)
## [1] "GW.arguments" "GW.diagnostic" "lm" "SDF"
## [5] "timings" "this.call" "Ftests"
##
names(gw_model_2002)
## [1] "GW.arguments" "GW.diagnostic" "lm" "SDF"
## [5] "timings" "this.call" "Ftests"
gw_model_2013_results <- gw_model_2013$SDF %>%
st_as_sf()
names(gw_model_2013_results)
## [1] "Intercept" "fertlty" "Primair" "Moyen"
## [5] "Secondr" "Ltrcy_F" "Ltrcy_A" "Ltrcy_W"
## [9] "Ltrcy_M" "menage" "pct_cm_f" "pop_density"
## [13] "y" "yhat" "residual" "CV_Score"
## [17] "Stud_residual" "Intercept_SE" "fertlty_SE" "Primair_SE"
## [21] "Moyen_SE" "Secondr_SE" "Ltrcy_F_SE" "Ltrcy_A_SE"
## [25] "Ltrcy_W_SE" "Ltrcy_M_SE" "menage_SE" "pct_cm_f_SE"
## [29] "pop_density_SE" "Intercept_TV" "fertlty_TV" "Primair_TV"
## [33] "Moyen_TV" "Secondr_TV" "Ltrcy_F_TV" "Ltrcy_A_TV"
## [37] "Ltrcy_W_TV" "Ltrcy_M_TV" "menage_TV" "pct_cm_f_TV"
## [41] "pop_density_TV" "Local_R2" "geometry"
###
gw_model_2002_results <- gw_model_2002$SDF %>%
st_as_sf()
names(gw_model_2002_results)
## [1] "Intercept" "fertlty" "Primair" "Moyen"
## [5] "Secondr" "Ltrcy_F" "Ltrcy_A" "Ltrcy_W"
## [9] "Ltrcy_M" "menage" "pct_cm_f" "pop_density"
## [13] "y" "yhat" "residual" "CV_Score"
## [17] "Stud_residual" "Intercept_SE" "fertlty_SE" "Primair_SE"
## [21] "Moyen_SE" "Secondr_SE" "Ltrcy_F_SE" "Ltrcy_A_SE"
## [25] "Ltrcy_W_SE" "Ltrcy_M_SE" "menage_SE" "pct_cm_f_SE"
## [29] "pop_density_SE" "Intercept_TV" "fertlty_TV" "Primair_TV"
## [33] "Moyen_TV" "Secondr_TV" "Ltrcy_F_TV" "Ltrcy_A_TV"
## [37] "Ltrcy_W_TV" "Ltrcy_M_TV" "menage_TV" "pct_cm_f_TV"
## [41] "pop_density_TV" "Local_R2" "geometry"
ggplot(gw_model_2002_results, aes(fill = Local_R2)) +
geom_sf(color = NA) +
scale_fill_viridis_c(direction = -1) +
labs(title = "Local R-squared values from the GWR model for Guediawaye Census 2002",fill="R-squared") +
theme_void()

ggplot(gw_model_2013_results, aes(fill = Local_R2)) +
geom_sf(color = NA) +
scale_fill_viridis_c(direction = -1) +
labs(title = "Local R-squared values from the GWR model for Guediawaye Census 2013",fill="R-squared") +
theme_void()

gw_model_results <- gw_model_2002_results %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(gw_model_2013_results %>%
dplyr::mutate(RGPH = "Census 2013")) %>%
st_as_sf()
gw_model_results %>%
ggplot(aes(fill = Local_R2)) +
# Adding spatial features to the plot
geom_sf() +
scale_fill_viridis_c(direction = -1) +
facet_wrap(~RGPH) +
labs(fill="R-squared")+
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
#legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
#panel.grid = element_line(color = "white", size = 0.8),
legend.box.background = element_rect(),
legend.box.margin = margin(6, 6, 6, 6),
plot.background = element_rect(fill = "white"))+
ggspatial::annotation_scale(
location = "br",
bar_cols = c("grey60", "white"),
text_family = "ArcherPro Book"
) +
ggspatial::annotation_north_arrow(
location = "tl", which_north = "true",
#pad_x = unit(0.4, "in"), pad_y = unit(0.4, "in"),
#which_north = "grid",
height = unit(1, "cm"),
width = unit(1, "cm"),
)

ggsave(paste0(here::here(),"/output/output_model/img/10_rsquared_rgph.png"), width = 8, height = 5)
# Saving the model output
# Saving in the "gw_model_results"
sf::st_write(obj=gw_model_results, paste0(here::here(), "/output/output_model/shp/gw_model_results.shp"), driver = "ESRI Shapefile", delete_layer = TRUE)
## Deleting layer `gw_model_results' using driver `ESRI Shapefile'
## Writing layer `gw_model_results' to data source
## `C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/shp/gw_model_results.shp' using driver `ESRI Shapefile'
## Writing 703 features with 43 fields and geometry type Polygon.
ggplot(gw_model_2002_results, aes(fill = menage)) +
geom_sf(color = NA) +
scale_fill_viridis_c(direction = -1) +
labs(title = "Local parameter estimates for household members for Guediawaye Census 2002") +
theme_void() +
labs(fill = "Local β for \nHH")

ggplot(gw_model_2013_results, aes(fill = menage)) +
geom_sf(color = NA) +
scale_fill_viridis_c(direction = -1) +
labs(title = "Local parameter estimates for household members for Guediawaye Census 2013") +
theme_void() +
labs(fill = "Local β for \nHH")

gw_model_results <- gw_model_2002_results %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(gw_model_2013_results %>%
dplyr::mutate(RGPH = "Census 2013")) %>%
st_as_sf()
gw_model_results %>%
ggplot(aes(fill = menage)) +
# Adding spatial features to the plot
geom_sf() +
scale_fill_viridis_c(direction = -1) +
labs(fill = "Local β for \nHH")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
#legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
#panel.grid = element_line(color = "white", size = 0.8),
legend.box.background = element_rect(),
legend.box.margin = margin(6, 6, 6, 6),
plot.background = element_rect(fill = "white"))+
ggspatial::annotation_scale(
location = "br",
bar_cols = c("grey60", "white"),
text_family = "ArcherPro Book"
) +
ggspatial::annotation_north_arrow(
location = "tl", which_north = "true",
#pad_x = unit(0.4, "in"), pad_y = unit(0.4, "in"),
#which_north = "grid",
height = unit(1, "cm"),
width = unit(1, "cm"),
)

ggsave(paste0(here::here(),"/output/output_model/img/11_local_beta_rgph.png"), width = 8, height = 5)
ggplot(gw_model_2002_results, aes(fill = fertlty)) +
geom_sf(color = NA) +
scale_fill_viridis_c(direction = -1) +
labs(title = "Local parameter estimates for population fertility for Guediawaye Census 2002") +
theme_void() +
labs(fill = "Local β for \n household's fertility")

ggplot(gw_model_2013_results, aes(fill = fertlty)) +
geom_sf(color = NA) +
scale_fill_viridis_c(direction = -1) +
labs(title = "Local parameter estimates for population fertility for Guediawaye Census 2013") +
theme_void() +
labs(fill = "Local β for \n household's fertility")

gw_model_results <- gw_model_2002_results %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(gw_model_2013_results %>%
dplyr::mutate(RGPH = "Census 2013")) %>%
st_as_sf()
purrr::map(predictors_2002, function(curr_var){
curr_lab <- label_predictors_2002[match(as.character(sym(curr_var)), predictors_2002)]
curr_plot <- gw_model_results %>%
ggplot(aes(fill = !!sym(curr_var))) + # Use !!sym() to handle variable names dynamically
# Adding spatial features to the plot
geom_sf() +
scale_fill_viridis_c(direction = -1) +
#scale_fill_continuous(labels = function(x) format(x, scientific = TRUE))+
labs(
title = curr_lab,
fill = "Local β") +
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
#legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
#panel.grid = element_line(color = "white", size = 0.8),
legend.box.background = element_rect(),
legend.box.margin = margin(6, 6, 6, 6),
plot.background = element_rect(fill = "white"))+
ggspatial::annotation_scale(
location = "br",
bar_cols = c("grey60", "white"),
text_family = "ArcherPro Book"
) +
ggspatial::annotation_north_arrow(
location = "tl", which_north = "true",
#pad_x = unit(0.4, "in"), pad_y = unit(0.4, "in"),
#which_north = "grid",
height = unit(1, "cm"),
width = unit(1, "cm"),
)
print(curr_plot)
ggsave(paste0(here::here(),"/output/output_model/img/12_beta_",curr_lab,"_rgph.png"), width = 8, height = 5)
}, .progress = TRUE)










## [[1]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/12_beta_Fertilité_rgph.png"
##
## [[2]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/12_beta_Niveau d'instruction: Primaire_rgph.png"
##
## [[3]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/12_beta_Niveau d'instruction: Moyen_rgph.png"
##
## [[4]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/12_beta_Niveau d'instruction: Secondaire_rgph.png"
##
## [[5]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/12_beta_Nombre de French dans le ménage_rgph.png"
##
## [[6]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/12_beta_Nombre de Arabic dans le ménage_rgph.png"
##
## [[7]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/12_beta_Nombre de Wolof dans le ménage_rgph.png"
##
## [[8]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/12_beta_Nombre de Mandingo dans le ménage_rgph.png"
##
## [[9]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/12_beta_Nombre de Soninke dans le ménage_rgph.png"
##
## [[10]]
## [1] "C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/img/12_beta_Nombre de ménage dans le DR_rgph.png"
Classification and
clustering of Guediawaye census data
Geodemographic
classification
set.seed(1994)
dfw_kmeans_2013 <- dfw_pca_2013 %>%
st_drop_geometry() %>%
select(PC1:PC8) %>%
kmeans(centers = 6)
table(dfw_kmeans_2013$cluster)
##
## 1 2 3 4 5 6
## 53 73 139 71 95 4
##
dfw_kmeans_2002 <- dfw_pca_2002 %>%
st_drop_geometry() %>%
select(PC1:PC8) %>%
kmeans(centers = 6)
table(dfw_kmeans_2002$cluster)
##
## 1 2 3 4 5 6
## 71 38 3 3 85 68
dfw_clusters_2002 <- dfw_pca_2002 %>%
mutate(cluster = as.character(dfw_kmeans_2002$cluster))
ggplot(dfw_clusters_2002, aes(fill = cluster)) +
geom_sf(size = 0.1) +
#scale_fill_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set3")+
labs(title = "Map of geodemographic clusters for Guediawaye Census 2002") +
theme_void() +
labs(fill = "Cluster ")

dfw_clusters_2013 <- dfw_pca_2013 %>%
mutate(cluster = as.character(dfw_kmeans_2013$cluster))
ggplot(dfw_clusters_2013, aes(fill = cluster)) +
geom_sf(size = 0.1) +
scale_fill_brewer(palette = "Set3")+
labs(title = "Map of geodemographic clusters for Guediawaye Census 2013") +
theme_void() +
labs(fill = "Cluster ")

dfw_clusters <- dfw_clusters_2002 %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(dfw_clusters_2013 %>%
dplyr::mutate(RGPH = "Census 2013")) %>%
st_as_sf()
dfw_clusters %>%
ggplot(aes(fill = cluster)) +
geom_sf(size = 0.1) +
scale_fill_brewer(palette = "Set3")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
#legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
#panel.grid = element_line(color = "white", size = 0.8),
legend.box.background = element_rect(),
legend.box.margin = margin(6, 6, 6, 6),
plot.background = element_rect(fill = "white"))+
ggspatial::annotation_scale(
location = "br",
bar_cols = c("grey60", "white"),
text_family = "ArcherPro Book"
) +
ggspatial::annotation_north_arrow(
location = "tl", which_north = "true",
#pad_x = unit(0.4, "in"), pad_y = unit(0.4, "in"),
#which_north = "grid",
height = unit(1, "cm"),
width = unit(1, "cm"),
)

ggsave(paste0(here::here(),"/output/output_model/img/13_cluster_rgph.png"), width = 8, height = 5)
# Saving the model output
# Saving in the "dfw_clusters"
sf::st_write(obj=dfw_clusters, paste0(here::here(), "/output/output_model/shp/dfw_clusters_pca.shp"), driver = "ESRI Shapefile", delete_layer = TRUE)
## Deleting layer `dfw_clusters_pca' using driver `ESRI Shapefile'
## Writing layer `dfw_clusters_pca' to data source
## `C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/shp/dfw_clusters_pca.shp' using driver `ESRI Shapefile'
## Writing 703 features with 14 fields and geometry type Polygon.
library(plotly)
cluster_plot <- ggplot(dfw_clusters_2013,
aes(x = PC1, y = PC2, color = cluster)) +
geom_point() +
scale_color_brewer(palette = "Set1") +
labs(title = "Interactive scatterplot of PC1 and PC2 colored by cluster for Guediawaye Census 2013") +
theme_minimal()
ggplotly(cluster_plot) %>%
layout(legend = list(orientation = "h", y = -0.15,
x = 0.2, title = "Cluster"))
cluster_plot <- ggplot(dfw_clusters_2002,
aes(x = PC1, y = PC2, color = cluster)) +
geom_point() +
scale_color_brewer(palette = "Set1") +
labs(title = "Interactive scatterplot of PC1 and PC2 colored by cluster for Guediawaye Census 2002") +
theme_minimal()
ggplotly(cluster_plot) %>%
layout(legend = list(orientation = "h", y = -0.15,
x = 0.2, title = "Cluster"))
Spatial clustering
& regionalization
library(spdep)
input_vars <- dfw_pca_2013 %>%
select(PC1:PC8) %>%
st_drop_geometry() %>%
as.data.frame()
skater_nbrs <- poly2nb(dfw_pca_2013, queen = TRUE)
costs <- nbcosts(skater_nbrs, input_vars)
skater_weights <- nb2listw(skater_nbrs, costs, style = "B")
mst <- mstree(skater_weights)
regions <- skater(
mst[,1:2],
input_vars,
ncuts = 5,
crit = 10
)
dfw_clusters_2013$region <- as.character(regions$group)
ggplot(dfw_clusters_2013, aes(fill = region)) +
geom_sf(size = 0.1) +
labs(title = "Map of contiguous regions derived with the SKATER algorithm for Guediawaye Census 2013") +
scale_fill_brewer(palette = "Set3")+
theme_void()

input_vars <- dfw_pca_2002 %>%
select(PC1:PC8) %>%
st_drop_geometry() %>%
as.data.frame()
skater_nbrs <- poly2nb(dfw_pca_2002, queen = TRUE)
costs <- nbcosts(skater_nbrs, input_vars)
skater_weights <- nb2listw(skater_nbrs, costs, style = "B")
mst <- mstree(skater_weights)
regions <- skater(
mst[,1:2],
input_vars,
ncuts = 5,
crit = 10
)
dfw_clusters_2002$region <- as.character(regions$group)
ggplot(dfw_clusters_2002, aes(fill = region)) +
geom_sf(size = 0.1) +
labs(title = "Map of contiguous regions derived with the SKATER algorithm for Guediawaye Census 2002") +
scale_fill_brewer(palette = "Set3")+
theme_void()

dfw_clusters <- dfw_clusters_2002 %>%
dplyr::mutate(RGPH = "Census 2002") %>%
plyr::rbind.fill(dfw_clusters_2013 %>%
dplyr::mutate(RGPH = "Census 2013")) %>%
st_as_sf()
dfw_clusters %>%
ggplot(aes(fill = region)) +
geom_sf(size = 0.1) +
scale_fill_brewer(palette = "Set3")+
facet_wrap(~RGPH) +
# Adjusting the plot theme
theme_map(base_size = 8) +
theme(panel.background = element_rect(),
#legend.background = element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
legend.position = "bottom",
text = element_text(size = 8),
#panel.grid = element_line(color = "white", size = 0.8),
legend.box.background = element_rect(),
legend.box.margin = margin(6, 6, 6, 6),
plot.background = element_rect(fill = "white"))+
ggspatial::annotation_scale(
location = "br",
bar_cols = c("grey60", "white"),
text_family = "ArcherPro Book"
) +
ggspatial::annotation_north_arrow(
location = "tl", which_north = "true",
#pad_x = unit(0.4, "in"), pad_y = unit(0.4, "in"),
#which_north = "grid",
height = unit(1, "cm"),
width = unit(1, "cm"),
)

ggsave(paste0(here::here(),"/output/output_model/img/14_dfw_pca_rgph.png"), width = 8, height = 5)
# Saving the model output
# Saving in the "dfw_clusters"
sf::st_write(obj=dfw_clusters, paste0(here::here(), "/output/output_model/shp/dfw_clusters.shp"), driver = "ESRI Shapefile", delete_layer = TRUE)
## Deleting layer `dfw_clusters' using driver `ESRI Shapefile'
## Writing layer `dfw_clusters' to data source
## `C:/Users/ASUS/Desktop/projet_sn/Analysing-Senegal-Census-Data/output/output_model/shp/dfw_clusters.shp' using driver `ESRI Shapefile'
## Writing 703 features with 15 fields and geometry type Polygon.